<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
	<head>
		<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
		<meta http-equiv="Content-Style-Type" content="text/css" />
		<meta name="generator" content="MediaWiki 1.15alpha" />
		<meta name="keywords" content="Regular expression,Special:Search/Regular expression,.NET Framework,AWK (programming language),Algorithm,American Standard Code for Information Interchange,Apache HTTP Server,Asterisk,Automata theory,Backtracking,Backus–Naur form" />
		<link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Regular_expression&amp;action=edit" />
		<link rel="edit" title="Edit this page" href="/w/index.php?title=Regular_expression&amp;action=edit" />
		<link rel="apple-touch-icon" href="http://en.wikipedia.org/apple-touch-icon.png" />
		<link rel="shortcut icon" href="/favicon.ico" />
		<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikipedia (en)" />
		<link rel="copyright" href="http://www.gnu.org/copyleft/fdl.html" />
		<link rel="alternate" type="application/rss+xml" title="Wikipedia RSS Feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=rss" />
		<link rel="alternate" type="application/atom+xml" title="Wikipedia Atom Feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom" />
		<title>Regular expression - Wikipedia, the free encyclopedia</title>
		<link rel="stylesheet" href="/skins-1.5/common/shared.css?207xx" type="text/css" media="screen" />
		<link rel="stylesheet" href="/skins-1.5/common/commonPrint.css?207xx" type="text/css" media="print" />
		<link rel="stylesheet" href="/skins-1.5/monobook/main.css?207xx" type="text/css" media="screen" />
		<link rel="stylesheet" href="/skins-1.5/chick/main.css?207xx" type="text/css" media="handheld" />
		<!--[if lt IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE50Fixes.css?207xx" type="text/css" media="screen" /><![endif]-->
		<!--[if IE 5.5000]><link rel="stylesheet" href="/skins-1.5/monobook/IE55Fixes.css?207xx" type="text/css" media="screen" /><![endif]-->
		<!--[if IE 6]><link rel="stylesheet" href="/skins-1.5/monobook/IE60Fixes.css?207xx" type="text/css" media="screen" /><![endif]-->
		<!--[if IE 7]><link rel="stylesheet" href="/skins-1.5/monobook/IE70Fixes.css?207xx" type="text/css" media="screen" /><![endif]-->
		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Common.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Print.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="print" />
		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Handheld.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" media="handheld" />
		<link rel="stylesheet" href="/w/index.php?title=MediaWiki:Monobook.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=2678400&amp;action=raw&amp;maxage=2678400" type="text/css" />
		<link rel="stylesheet" href="/w/index.php?title=-&amp;action=raw&amp;maxage=2678400&amp;gen=css" type="text/css" />
		<!--[if lt IE 7]><script type="text/javascript" src="/skins-1.5/common/IEFixes.js?207xx"></script>
		<meta http-equiv="imagetoolbar" content="no" /><![endif]-->

		<script type= "text/javascript">/*<![CDATA[*/
		var skin = "monobook";
		var stylepath = "/skins-1.5";
		var wgArticlePath = "/wiki/$1";
		var wgScriptPath = "/w";
		var wgScript = "/w/index.php";
		var wgVariantArticlePath = false;
		var wgActionPaths = {};
		var wgServer = "http://en.wikipedia.org";
		var wgCanonicalNamespace = "";
		var wgCanonicalSpecialPageName = false;
		var wgNamespaceNumber = 0;
		var wgPageName = "Regular_expression";
		var wgTitle = "Regular expression";
		var wgAction = "view";
		var wgArticleId = "25717";
		var wgIsArticle = true;
		var wgUserName = null;
		var wgUserGroups = null;
		var wgUserLanguage = "en";
		var wgContentLanguage = "en";
		var wgBreakFrames = false;
		var wgCurRevisionId = 281174402;
		var wgVersion = "1.15alpha";
		var wgEnableAPI = true;
		var wgEnableWriteAPI = true;
		var wgSeparatorTransformTable = ["", ""];
		var wgDigitTransformTable = ["", ""];
		var wgMWSuggestTemplate = "http://en.wikipedia.org/w/api.php?action=opensearch\x26search={searchTerms}\x26namespace={namespaces}\x26suggest";
		var wgDBname = "enwiki";
		var wgSearchNamespaces = [0];
		var wgMWSuggestMessages = ["with suggestions", "no suggestions"];
		var wgRestrictionEdit = [];
		var wgRestrictionMove = [];
		/*]]>*/</script>

		<script type="text/javascript" src="/skins-1.5/common/wikibits.js?207xx"><!-- wikibits js --></script>
		<!-- Head Scripts -->
		<script type="text/javascript" src="/skins-1.5/common/ajax.js?207xx"></script>
		<script type="text/javascript" src="/skins-1.5/common/mwsuggest.js?207xx"></script>
<script type="text/javascript">/*<![CDATA[*/
var wgNotice='';var wgNoticeLocal='';
/*]]>*/</script>		<script type="text/javascript" src="http://upload.wikimedia.org/centralnotice/wikipedia/en/centralnotice.js?207xx"></script>
		<script type="text/javascript" src="/w/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=monobook"><!-- site js --></script>
	</head>
<body class="mediawiki ltr ns-0 ns-subject page-Regular_expression skin-monobook">
	<div id="globalWrapper">
		<div id="column-content">
	<div id="content">
		<a name="top" id="top"></a>
		<div id="siteNotice"><script type='text/javascript'>if (wgNotice != '') document.writeln(wgNotice);</script></div>		<h1 id="firstHeading" class="firstHeading">Regular expression</h1>
		<div id="bodyContent">
			<h3 id="siteSub">From Wikipedia, the free encyclopedia</h3>
			<div id="contentSub"></div>
									<div id="jump-to-nav">Jump to: <a href="#column-one">navigation</a>, <a href="#searchInput">search</a></div>			<!-- start content -->
			<p>In <a href="/wiki/Computing" title="Computing">computing</a>, <b>regular expressions</b> provide a concise and flexible means for identifying strings of text of interest, such as particular characters, words, or patterns of characters. Regular expressions (abbreviated as <b>regex</b> or <b>regexp</b>, with plural forms <b>regexes</b>, <b>regexps</b>, or <b>regexen</b>) are written in a <a href="/wiki/Formal_language" title="Formal language">formal language</a> that can be interpreted by a regular expression processor, a program that either serves as a <a href="/wiki/Parser_generator" title="Parser generator" class="mw-redirect">parser generator</a> or examines text and identifies parts that match the provided <a href="/wiki/Specification_(technical_standard)" title="Specification (technical standard)">specification</a>.</p>
<p>The following examples illustrate a few specifications that could be expressed in a regular expression:</p>
<ul>
<li>the sequence of characters "car" in any context, such as "car", "cartoon", or "bicarbonate"</li>
<li>the word "car" when it appears as an isolated word</li>
<li>the word "car" when preceded by the word "blue" or "red"</li>
<li>a dollar sign immediately followed by one or more digits, and then optionally a period and exactly two more digits</li>
</ul>
<p>Regular expressions can be much more complex than these examples.</p>
<p>Regular expressions are used by many <a href="/wiki/Text_editor" title="Text editor">text editors</a>, utilities, and <a href="/wiki/Programming_languages" title="Programming languages" class="mw-redirect">programming languages</a> to search and manipulate text based on <a href="/wiki/Pattern" title="Pattern">patterns</a>. For example, <a href="/wiki/Perl" title="Perl">Perl</a>, <a href="/wiki/Ruby_(programming_language)" title="Ruby (programming language)">Ruby</a> and <a href="/wiki/Tcl" title="Tcl">Tcl</a> have a powerful regular expression engine built directly into their syntax. Several utilities provided by <a href="/wiki/Unix" title="Unix">Unix</a> distributions—including the editor <a href="/wiki/Ed_(text_editor)" title="Ed (text editor)">ed</a> and the filter <a href="/wiki/Grep" title="Grep">grep</a>—were the first to popularize the concept of regular expressions.</p>
<p>As an example of the syntax, the regular expression <code>\bex</code> can be used to search for all instances of the string "<i>ex</i>" that occur after word boundaries (signified by the <code>\b</code>). Thus in the string "Texts for experts," <code>\bex</code> matches the "<i>ex</i>" in "experts" but not in "Texts" (because the "<i>ex</i>" occurs inside a word and not immediately after a word boundary).</p>
<p>Many modern computing systems provide <a href="/wiki/Wildcard_character" title="Wildcard character">wildcard characters</a> in matching filenames from a <a href="/wiki/File_system" title="File system">file system</a>. This is a core capability of many <a href="/wiki/Shell_(computing)" title="Shell (computing)">command-line shells</a> and is also known as <a href="/wiki/Glob_(programming)" title="Glob (programming)">globbing</a>. Wildcards differ from regular expressions in that they generally only express very limited forms of alternatives.</p>
<table id="toc" class="toc" summary="Contents">
<tr>
<td>
<div id="toctitle">
<h2>Contents</h2>
</div>
<ul>
<li class="toclevel-1"><a href="#Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
<li class="toclevel-1"><a href="#History"><span class="tocnumber">2</span> <span class="toctext">History</span></a></li>
<li class="toclevel-1"><a href="#Formal_language_theory"><span class="tocnumber">3</span> <span class="toctext">Formal language theory</span></a></li>
<li class="toclevel-1"><a href="#Syntax"><span class="tocnumber">4</span> <span class="toctext">Syntax</span></a>
<ul>
<li class="toclevel-2"><a href="#POSIX"><span class="tocnumber">4.1</span> <span class="toctext">POSIX</span></a>
<ul>
<li class="toclevel-3"><a href="#POSIX_Basic_Regular_Expressions"><span class="tocnumber">4.1.1</span> <span class="toctext">POSIX Basic Regular Expressions</span></a></li>
<li class="toclevel-3"><a href="#POSIX_Extended_Regular_Expressions"><span class="tocnumber">4.1.2</span> <span class="toctext">POSIX Extended Regular Expressions</span></a></li>
<li class="toclevel-3"><a href="#POSIX_character_classes"><span class="tocnumber">4.1.3</span> <span class="toctext">POSIX character classes</span></a></li>
</ul>
</li>
<li class="toclevel-2"><a href="#Perl-derivative_regular_expressions"><span class="tocnumber">4.2</span> <span class="toctext">Perl-derivative regular expressions</span></a></li>
<li class="toclevel-2"><a href="#Simple_Regular_Expressions"><span class="tocnumber">4.3</span> <span class="toctext">Simple Regular Expressions</span></a></li>
<li class="toclevel-2"><a href="#Lazy_quantification"><span class="tocnumber">4.4</span> <span class="toctext">Lazy quantification</span></a></li>
</ul>
</li>
<li class="toclevel-1"><a href="#Patterns_for_non-regular_languages"><span class="tocnumber">5</span> <span class="toctext">Patterns for non-regular languages</span></a></li>
<li class="toclevel-1"><a href="#Implementations_and_running_times"><span class="tocnumber">6</span> <span class="toctext">Implementations and running times</span></a></li>
<li class="toclevel-1"><a href="#Regular_expressions_and_Unicode"><span class="tocnumber">7</span> <span class="toctext">Regular expressions and Unicode</span></a></li>
<li class="toclevel-1"><a href="#Uses_of_regular_expressions"><span class="tocnumber">8</span> <span class="toctext">Uses of regular expressions</span></a></li>
<li class="toclevel-1"><a href="#See_also"><span class="tocnumber">9</span> <span class="toctext">See also</span></a></li>
<li class="toclevel-1"><a href="#Notes"><span class="tocnumber">10</span> <span class="toctext">Notes</span></a></li>
<li class="toclevel-1"><a href="#References"><span class="tocnumber">11</span> <span class="toctext">References</span></a></li>
<li class="toclevel-1"><a href="#External_links"><span class="tocnumber">12</span> <span class="toctext">External links</span></a></li>
</ul>
</td>
</tr>
</table>
<script type="text/javascript">
//<![CDATA[
 if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } 
//]]>
</script>
<p><a name="Basic_concepts" id="Basic_concepts"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=1" title="Edit section: Basic concepts">edit</a>]</span> <span class="mw-headline">Basic concepts</span></h2>
<p>A regular expression, often called a <b>pattern</b>, is an expression that describes a set of strings. They are usually used to give a concise description of a <a href="/wiki/Set_(computer_science)" title="Set (computer science)">set</a>, without having to list all <a href="/wiki/Data_element" title="Data element">elements</a>. For example, the set containing the three strings "<i>Handel</i>", "<i>Händel</i>", and "<i>Haendel</i>" can be described by the pattern <code>H(ä|ae?)ndel</code> (or alternatively, it is said that the pattern <i>matches</i> each of the three strings). In most formalisms, if there is any regex that matches a particular set then there is an infinite number of such expressions. Most formalisms provide the following operations to construct regular expressions.</p>
<dl>
<dt>Boolean "or"</dt>
<dd>A <a href="/wiki/Vertical_bar" title="Vertical bar">vertical bar</a> separates alternatives. For example, <code>gray|grey</code> can match "<i>gray</i>" or "<i>grey</i>".</dd>
<dt>Grouping</dt>
<dd><a href="/wiki/Bracket" title="Bracket">Parentheses</a> are used to define the scope and precedence of the operators (among other uses). For example, <code>gray|grey</code> and <code>gr(a|e)y</code> are equivalent patterns which both describe the set of "<i>gray</i>" and "<i>grey</i>".</dd>
<dt>Quantification</dt>
<dd>A quantifier after a token (such as a character) or group specifies how often that preceding element is allowed to occur. The most common quantifiers are the <a href="/wiki/Question_mark" title="Question mark">question mark</a> <code>?</code>, the <a href="/wiki/Asterisk" title="Asterisk">asterisk</a> <code>*</code> (derived from the <a href="/wiki/Kleene_star" title="Kleene star">Kleene star</a>), and the <a href="/wiki/Plus_sign" title="Plus sign" class="mw-redirect">plus sign</a> <code>+</code>.</dd>
</dl>
<dl>
<dd>
<table>
<tr style="vertical-align:top;">
<td style="width:15px;"><code><b>?</b></code></td>
<td>The question mark indicates there is <i>zero or one</i> of the preceding element. For example, <code>colou?r</code> matches both "<i>color</i>" and "<i>colour</i>".</td>
</tr>
<tr style="vertical-align:top;">
<td><code><b>*</b></code></td>
<td>The asterisk indicates there are <i>zero or more</i> of the preceding element. For example, <code>ab*c</code> matches "<i>ac</i>", "<i>abc</i>", "<i>abbc</i>", "<i>abbbc</i>", and so on.</td>
</tr>
<tr style="vertical-align:top;">
<td><code><b>+</b></code></td>
<td>The plus sign indicates that there is <i>one or more</i> of the preceding element. For example, <code>ab+c</code> matches "<i>abc</i>", "<i>abbc</i>", "<i>abbbc</i>", and so on, but not "<i>ac</i>".</td>
</tr>
</table>
</dd>
</dl>
<p>These constructions can be combined to form arbitrarily complex expressions, much like one can construct arithmetical expressions from numbers and the operations <b>+</b>, <b>−</b>, <b>×</b>, and <b>÷</b>. For example, <code>H(ae?|ä)ndel</code> and <code>H(a|ae|ä)ndel</code> are both valid patterns which match the same strings as the earlier example, <code>H(ä|ae?)ndel</code>.</p>
<p>The precise <a href="/wiki/Syntax" title="Syntax">syntax</a> for regular expressions varies among tools and with context; more detail is given in the <a href="#Syntax" title=""><i>Syntax</i> section</a>.</p>
<p><a name="History" id="History"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=2" title="Edit section: History">edit</a>]</span> <span class="mw-headline">History</span></h2>
<p>The origins of regular expressions lie in <a href="/wiki/Automata_theory" title="Automata theory">automata theory</a> and <a href="/wiki/Formal_language" title="Formal language">formal language theory</a>, both of which are part of <a href="/wiki/Theoretical_computer_science" title="Theoretical computer science">theoretical computer science</a>. These fields study models of computation (automata) and ways to describe and classify formal languages. In the 1950s, mathematician <a href="/wiki/Stephen_Cole_Kleene" title="Stephen Cole Kleene">Stephen Cole Kleene</a> described these models using his mathematical notation called <i>regular sets</i>. The <a href="/wiki/SNOBOL" title="SNOBOL">SNOBOL</a> language was an early implementation of <a href="/wiki/Pattern_matching" title="Pattern matching">pattern matching</a>, but not identical to regular expressions. <a href="/wiki/Ken_Thompson" title="Ken Thompson">Ken Thompson</a> built Kleene's notation into the editor <a href="/wiki/QED_(text_editor)" title="QED (text editor)">QED</a> as a means to match patterns in text files. He later added this capability to the Unix editor <a href="/wiki/Ed_(text_editor)" title="Ed (text editor)">ed</a>, which eventually led to the popular search tool <a href="/wiki/Grep" title="Grep">grep</a>'s use of regular expressions ("grep" is a word derived from the command for regular expression searching in the ed editor: <code>g/<i>re</i>/p</code> where <i>re</i> stands for regular expression<sup id="cite_ref-0" class="reference"><a href="#cite_note-0" title=""><span>[</span>1<span>]</span></a></sup>). Since that time, many variations of Thompson's original adaptation of regular expressions have been widely used in Unix and Unix-like utilities including <a href="/wiki/Expr" title="Expr">expr</a>, <a href="/wiki/AWK_(programming_language)" title="AWK (programming language)" class="mw-redirect">AWK</a>, <a href="/wiki/Emacs" title="Emacs">Emacs</a>, <a href="/wiki/Vi" title="Vi">vi</a>, and <a href="/wiki/Lex_programming_tool" title="Lex programming tool" class="mw-redirect">lex</a>.</p>
<p><a href="/wiki/Perl" title="Perl">Perl</a> and <a href="/wiki/Tcl" title="Tcl">Tcl</a> regular expressions were derived from a regex library written by <a href="/wiki/Henry_Spencer" title="Henry Spencer">Henry Spencer</a>, though Perl later expanded on Spencer's library to add many new features.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1" title=""><span>[</span>2<span>]</span></a></sup> <a href="/wiki/Philip_Hazel" title="Philip Hazel">Philip Hazel</a> developed <a href="/wiki/Perl_Compatible_Regular_Expressions" title="Perl Compatible Regular Expressions">PCRE</a> (Perl Compatible Regular Expressions), which attempts to closely mimic Perl's regular expression functionality, and is used by many modern tools including <a href="/wiki/PHP" title="PHP">PHP</a> and <a href="/wiki/Apache_HTTP_Server" title="Apache HTTP Server">Apache HTTP Server</a>. Part of the effort in the design of <a href="/wiki/Perl_6" title="Perl 6">Perl 6</a> is to improve Perl's regular expression integration, and to increase their scope and capabilities to allow the definition of <a href="/wiki/Parsing_expression_grammar" title="Parsing expression grammar">parsing expression grammars</a>.<sup id="cite_ref-Apocalypse5_2-0" class="reference"><a href="#cite_note-Apocalypse5-2" title=""><span>[</span>3<span>]</span></a></sup> The result is a mini-language called <a href="/wiki/Perl_6_rules" title="Perl 6 rules">Perl 6 rules</a>, which are used to define Perl 6 grammar as well as provide a tool to programmers in the language. These rules maintain existing features of Perl 5.x regular expressions, but also allow <a href="/wiki/Backus%E2%80%93Naur_form" title="Backus–Naur form" class="mw-redirect">BNF</a>-style definition of a <a href="/wiki/Recursive_descent_parser" title="Recursive descent parser">recursive descent parser</a> via sub-rules.</p>
<p>The use of regular expressions in structured information standards for document and database modeling started in the 1960s and expanded in the 1980s when industry standards like <a href="/wiki/Standard_Generalized_Markup_Language" title="Standard Generalized Markup Language">ISO SGML</a> (precursored by ANSI "GCA 101-1983") consolidated. The kernel of the <a href="/wiki/XML_schema#Validation" title="XML schema">structure specification language</a> standards are regular expressions. Simple use is evident in the <a href="/wiki/Document_Type_Definition" title="Document Type Definition">DTD</a> element group syntax.</p>
<p>See also <a href="/wiki/Pattern_matching#History" title="Pattern matching">Pattern matching: History</a>.</p>
<p><a name="Formal_language_theory" id="Formal_language_theory"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=3" title="Edit section: Formal language theory">edit</a>]</span> <span class="mw-headline">Formal language theory</span></h2>
<p>Regular expressions can be expressed in terms of <a href="/wiki/Formal_language" title="Formal language">formal language theory</a>. Regular expressions consist of constants and operators that denote sets of strings and operations over these sets, respectively. Given a finite alphabet Σ the following constants are defined:</p>
<ul>
<li>(<i>empty set</i>) <span class="Unicode">∅</span> denoting the set <span class="Unicode">∅</span></li>
<li>(<i>empty string</i>) ε denoting a string with no characters.</li>
<li>(<i><a href="/wiki/String_literal" title="String literal">literal character</a></i>) <i>a</i> in Σ denoting a character in the language.</li>
</ul>
<p>The following operations are defined:</p>
<ul>
<li>(<i>concatenation</i>) <i>RS</i> denoting the set { αβ | α in <i>R</i> and β in <i>S</i> }. For example {"ab", "c"}{"d", "ef"} = {"abd", "abef", "cd", "cef"}.</li>
<li>(<i>alternation</i>) <i>R|S</i> denoting the set union of <i>R</i> and <i>S</i>. Many textbooks use the symbols <span class="Unicode">∪</span>, <span class="Unicode">+</span>, or <span class="Unicode">∨</span> for alternation instead of the vertical bar. For example {"ab", "c"}<span class="Unicode">∪</span>{"d", "ef"} = {"ab", "c", "d", "ef"}</li>
<li>(<i><a href="/wiki/Kleene_star" title="Kleene star">Kleene star</a></i>) <i>R</i>* denoting the smallest <a href="/wiki/Subset" title="Subset">superset</a> of <i>R</i> that contains ε and is <a href="/wiki/Closure_(mathematics)" title="Closure (mathematics)">closed</a> under string concatenation. This is the set of all strings that can be made by concatenating zero or more strings in <i>R</i>. For example, {"ab", "c"}* = {ε, "ab", "c", "abab", "abc", "cab", "cc", "ababab", "abcab", ... }.</li>
</ul>
<p>The above constants and operators form a <a href="/wiki/Kleene_algebra" title="Kleene algebra">Kleene algebra</a>.</p>
<p>To avoid brackets it is assumed that the Kleene star has the highest priority, then concatenation and then set union. If there is no ambiguity then brackets may be omitted. For example, <code>(ab)c</code> can be written as <code>abc</code>, and <code>a|(b(c*))</code> can be written as <code>a|bc*</code>.</p>
<p><b>Examples:</b></p>
<ul>
<li><code>a|b*</code> denotes {ε, <i>a</i>, <i>b</i>, <i>bb</i>, <i>bbb</i>, ...}</li>
<li><code>(a|b)*</code> denotes the set of all strings with no symbols other than <i>a</i> and <i>b</i>, including the empty string: {ε, <i>a</i>, <i>b</i>, <i>aa</i>, <i>ab</i>, <i>ba</i>, <i>bb</i>, <i>aaa</i>, ...}</li>
<li><code>ab*(c|ε)</code> denotes the set of strings starting with <i>a</i>, then zero or more <i>b</i>s and finally optionally a <i>c</i>: {<i>a</i>, <i>ac</i>, <i>ab</i>, <i>abc</i>, <i>abb</i>, <i>abbc</i>, ...}</li>
</ul>
<p>The formal definition of regular expressions is purposely parsimonious and avoids defining the redundant quantifiers <code>?</code> and <code>+</code>, which can be expressed as follows: <code>a+</code> = <code>aa*</code>, and <code>a?</code> = <code>(a|ε)</code>. Sometimes the complement operator ~ is added; ~<i>R</i> denotes the set of all strings over Σ* that are not in <i>R</i>. The complement operator is redundant, as it can always be expressed by using the other operators (although the process for computing such a representation is complex, and the result may be exponentially larger).</p>
<p>Regular expressions in this sense can express the <a href="/wiki/Regular_language" title="Regular language">regular languages</a>, exactly the class of languages accepted by <a href="/wiki/Finite_state_machine" title="Finite state machine">finite state automata</a>. There is, however, a significant difference in compactness. Some classes of regular languages can only be described by automata that grow <a href="/wiki/Exponential_growth" title="Exponential growth">exponentially</a> in size, while the length of the required regular expressions only grow linearly. Regular expressions correspond to the type-3 <a href="/wiki/Formal_grammar" title="Formal grammar">grammars</a> of the <a href="/wiki/Chomsky_hierarchy" title="Chomsky hierarchy">Chomsky hierarchy</a>. On the other hand, there is a simple mapping from regular expressions to <a href="/wiki/Nondeterministic_finite_state_machine" title="Nondeterministic finite state machine">nondeterministic finite automata</a> (NFAs) that does not lead to such a blowup in size; for this reason NFAs are often used as alternative representations of regular expressions.</p>
<p>We can also study expressive power within the formalism. As the examples show, different regular expressions can express the same language: the formalism is redundant.</p>
<p>It is possible to write an <a href="/wiki/Algorithm" title="Algorithm">algorithm</a> which for two given regular expressions decides whether the described languages are essentially equal, reduces each expression to a minimal deterministic finite state machine, and determines whether they are <a href="/wiki/Isomorphism" title="Isomorphism">isomorphic</a> (equivalent).</p>
<p>To what extent can this redundancy be eliminated? Can we find an interesting subset of regular expressions that is still fully expressive? <a href="/wiki/Kleene_star" title="Kleene star">Kleene star</a> and <a href="/wiki/Union_(set_theory)" title="Union (set theory)">set union</a> are obviously required, but perhaps we can restrict their use. This turns out to be a surprisingly difficult problem. As simple as the regular expressions are, it turns out there is no method to systematically rewrite them to some normal form. The lack of axiomatization in the past led to the <a href="/wiki/Star_height_problem" title="Star height problem">star height problem</a>. Recently, Cornell University professor Dexter Kozen axiomatized regular expressions with <a href="/wiki/Kleene_algebra" title="Kleene algebra">Kleene algebra</a>.</p>
<p>It is worth noting that many real-world "regular expression" engines implement features that cannot be expressed in the regular expression algebra; see <a href="#Patterns_for_non-regular_languages" title="">below</a> for more on this.</p>
<p><a name="Syntax" id="Syntax"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=4" title="Edit section: Syntax">edit</a>]</span> <span class="mw-headline">Syntax</span></h2>
<p><a name="POSIX" id="POSIX"></a></p>
<h3><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=5" title="Edit section: POSIX">edit</a>]</span> <span class="mw-headline">POSIX</span></h3>
<p><a name="POSIX_Basic_Regular_Expressions" id="POSIX_Basic_Regular_Expressions"></a></p>
<h4><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=6" title="Edit section: POSIX Basic Regular Expressions">edit</a>]</span> <span class="mw-headline">POSIX Basic Regular Expressions</span></h4>
<p>Traditional <a href="/wiki/Unix" title="Unix">Unix</a> regular expression syntax followed common conventions but often differed from tool to tool. The <a href="/wiki/Institute_of_Electrical_and_Electronics_Engineers" title="Institute of Electrical and Electronics Engineers">IEEE</a> <a href="/wiki/POSIX" title="POSIX">POSIX</a> Basic Regular Expressions (BRE) standard (released alongside an alternative flavor called Extended Regular Expressions or ERE) was designed mostly for backward compatibility with the traditional (Simple Regular Expression) syntax but provided a common standard which has since been adopted as the default syntax of many Unix regular expression tools, though there is often some variation or additional features. Many such tools also provide support for ERE syntax with <a href="/wiki/Command_line_argument" title="Command line argument" class="mw-redirect">command line arguments</a>.</p>
<p>In the BRE syntax, most characters are treated as <a href="/wiki/Literal" title="Literal">literals</a>&#160;— they match only themselves (i.e., <code>a</code> matches "<i>a</i>"). The exceptions, listed below, are called <a href="/wiki/Metacharacter" title="Metacharacter">metacharacters</a> or metasequences.</p>
<table class="wikitable">
<tr>
<th>Metacharacter</th>
<th>Description</th>
</tr>
<tr valign="top">
<td><code>.</code></td>
<td>Matches any single character (many applications exclude <a href="/wiki/Newline" title="Newline">newlines</a>, and exactly which characters are considered newlines is flavor, character encoding, and platform specific, but it is safe to assume that the line feed character is included). Within POSIX bracket expressions, the dot character matches a literal dot. For example, <code>a.c</code> matches "<i>abc</i>", etc., but <code>[a.c]</code> matches only "<i>a</i>", "<i>.</i>", or "<i>c</i>".</td>
</tr>
<tr valign="top">
<td><code>[&#160;]</code></td>
<td>A bracket expression. Matches a single character that is contained within the brackets. For example, <code>[abc]</code> matches "<i>a</i>", "<i>b</i>", or "<i>c</i>". <code>[a-z]</code> specifies a range which matches any lowercase letter from "<i>a</i>" to "<i>z</i>". These forms can be mixed: <code>[abcx-z]</code> matches "<i>a</i>", "<i>b</i>", "<i>c</i>", "<i>x</i>", "<i>y</i>", or "<i>z</i>", as does <code>[a-cx-z]</code>.
<p>The <code>-</code> character is treated as a literal character if it is the last or the first character within the brackets, or if it is escaped with a backslash: <code>[abc-]</code>, <code>[-abc]</code>, or <code>[a\-bc]</code>.</p>
</td>
</tr>
<tr valign="top">
<td><code>[^&#160;]</code></td>
<td>Matches a single character that is not contained within the brackets. For example, <code>[^abc]</code> matches any character other than "<i>a</i>", "<i>b</i>", or "<i>c</i>". <code>[^a-z]</code> matches any single character that is not a lowercase letter from "<i>a</i>" to "<i>z</i>". As above, literal characters and ranges can be mixed.</td>
</tr>
<tr valign="top">
<td><code>^</code></td>
<td>Matches the starting position within the string. In line-based tools, it matches the starting position of any line.</td>
</tr>
<tr valign="top">
<td><code>$</code></td>
<td>Matches the ending position of the string or the position just before a string-ending newline. In line-based tools, it matches the ending position of any line.</td>
</tr>
<tr valign="top">
<td><span style="white-space:nowrap;">BRE: <code>\(&#160;\)</code></span><br />
<span style="white-space:nowrap;">ERE: <code>(&#160;)</code></span></td>
<td>Defines a marked subexpression. The string matched within the parentheses can be recalled later (see the next entry, <code>\<i>n</i></code>). A marked subexpression is also called a block or capturing group.</td>
</tr>
<tr valign="top">
<td><code>\<i>n</i></code></td>
<td>Matches what the <i>n</i>th marked subexpression matched, where <i>n</i> is a digit from 1 to 9. This construct is theoretically <b>irregular</b> and was not adopted in the POSIX ERE syntax. Some tools allow referencing more than nine capturing groups.</td>
</tr>
<tr valign="top">
<td><code>*</code></td>
<td>Matches the preceding element zero or more times. For example, <code>ab*c</code> matches "<i>ac</i>", "<i>abc</i>", "<i>abbbc</i>", etc. <code>[xyz]*</code> matches "", "<i>x</i>", "<i>y</i>", "<i>z</i>", "<i>zx</i>", "<i>zyx</i>", "<i>xyzzy</i>", and so on. <code>\(ab\)*</code> matches "", "<i>ab</i>", "<i>abab</i>", "<i>ababab</i>", and so on.</td>
</tr>
<tr valign="top">
<td><span style="white-space:nowrap;">BRE: <code>\{<i>m</i>,<i>n</i>\}</code></span><br />
<span style="white-space:nowrap;">ERE: <code>{<i>m</i>,<i>n</i>}</code></span></td>
<td>Matches the preceding element at least <i>m</i> and not more than <i>n</i> times. For example, <code>a\{3,5\}</code> matches only "<i>aaa</i>", "<i>aaaa</i>", and "<i>aaaaa</i>". This is not found in a few, older instances of regular expressions.</td>
</tr>
</table>
<p><b>Examples:</b></p>
<ul>
<li><code>.at</code> matches any three-character string ending with "at", including "<i>hat</i>", "<i>cat</i>", and "<i>bat</i>".</li>
<li><code>[hc]at</code> matches "<i>hat</i>" and "<i>cat</i>".</li>
<li><code>[^b]at</code> matches all strings matched by <code>at</code> except "<i>bat</i>".</li>
<li><code>^[hc]at</code> matches "<i>hat</i>" and "<i>cat</i>", but only at the beginning of the string or line.</li>
<li><code>[hc]at$</code> matches "<i>hat</i>" and "<i>cat</i>", but only at the end of the string or line.</li>
</ul>
<p><a name="POSIX_Extended_Regular_Expressions" id="POSIX_Extended_Regular_Expressions"></a></p>
<h4><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=7" title="Edit section: POSIX Extended Regular Expressions">edit</a>]</span> <span class="mw-headline">POSIX Extended Regular Expressions</span></h4>
<p>The meaning of metacharacters <a href="/wiki/Escape_sequence" title="Escape sequence">escaped</a> with a backslash is reversed for some characters in the POSIX Extended Regular Expression (ERE) syntax. With this syntax, a backslash causes the metacharacter to be treated as a literal character. So, for example, <code>\(&#160;\)</code> is now <code>(&#160;)</code> and <code>\{&#160;\}</code> is now <code>{&#160;}</code>. Additionally, support is removed for <code>\<em>n</em></code> backreferences and the following metacharacters are added:</p>
<table class="wikitable">
<tr>
<th>Metacharacter</th>
<th>Description</th>
</tr>
<tr valign="top">
<td><code>?</code></td>
<td>Matches the preceding element zero or one time. For example, <code>ba?</code> matches "<i>b</i>" or "<i>ba</i>".</td>
</tr>
<tr>
<td><code>+</code></td>
<td>Matches the preceding element one or more times. For example, <code>ba+</code> matches "<i>ba</i>", "<i>baa</i>", "<i>baaa</i>", and so on.</td>
</tr>
<tr>
<td><code>|</code></td>
<td>The choice (aka alternation or set union) operator matches either the expression before or the expression after the operator. For example, <code>abc|def</code> matches "<i>abc</i>" or "<i>def</i>".</td>
</tr>
</table>
<p><b>Examples:</b></p>
<ul>
<li><code>[hc]+at</code> matches "<i>hat</i>", "<i>cat</i>", "<i>hhat</i>", "<i>chat</i>", "<i>hcat</i>", "<i>ccchat</i>", and so on, but not "<i>at</i>".</li>
<li><code>[hc]?at</code> matches "<i>hat</i>", "<i>cat</i>", and "<i>at</i>".</li>
<li><code>cat|dog</code> matches "<i>cat</i>" or "<i>dog</i>".</li>
</ul>
<p>POSIX Extended Regular Expressions can often be used with modern Unix utilities by including the <a href="/wiki/Command_line" title="Command line" class="mw-redirect">command line</a> flag <var>-E</var>.</p>
<p><a name="POSIX_character_classes" id="POSIX_character_classes"></a></p>
<h4><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=8" title="Edit section: POSIX character classes">edit</a>]</span> <span class="mw-headline">POSIX character classes</span></h4>
<p>Since many ranges of characters depend on the chosen locale setting (i.e., in some settings letters are organized as <i>abc...zABC...Z</i>, while in some others as <i>aAbBcC...zZ</i>), the POSIX standard defines some classes or categories of characters as shown in the following table:</p>
<table class="wikitable">
<tr>
<th>POSIX</th>
<th>Perl</th>
<th>ASCII</th>
<th>Description</th>
</tr>
<tr>
<td><code>[:alnum:]</code></td>
<td></td>
<td><code>[A-Za-z0-9]</code></td>
<td>Alphanumeric characters</td>
</tr>
<tr>
<td><code>[:word:]</code></td>
<td><code>\w</code></td>
<td><code>[A-Za-z0-9_]</code></td>
<td>Alphanumeric characters plus "_"</td>
</tr>
<tr>
<td></td>
<td><code>\W</code></td>
<td><code>[^\w]</code></td>
<td>non-word character</td>
</tr>
<tr>
<td><code>[:alpha:]</code></td>
<td></td>
<td><code>[A-Za-z]</code></td>
<td>Alphabetic characters</td>
</tr>
<tr>
<td><code>[:blank:]</code></td>
<td></td>
<td><code>[ \t]</code></td>
<td>Space and tab</td>
</tr>
<tr>
<td><code>[:cntrl:]</code></td>
<td></td>
<td><code>[\x00-\x1F\x7F]</code></td>
<td>Control characters</td>
</tr>
<tr>
<td><code>[:digit:]</code></td>
<td><code>\d</code></td>
<td><code>[0-9]</code></td>
<td>Digits</td>
</tr>
<tr>
<td></td>
<td><code>\D</code></td>
<td><code>[^\d]</code></td>
<td>non-digit</td>
</tr>
<tr>
<td><code>[:graph:]</code></td>
<td></td>
<td><code>[\x21-\x7E]</code></td>
<td>Visible characters</td>
</tr>
<tr>
<td><code>[:lower:]</code></td>
<td></td>
<td><code>[a-z]</code></td>
<td>Lowercase letters</td>
</tr>
<tr>
<td><code>[:print:]</code></td>
<td></td>
<td><code>[\x20-\x7E]</code></td>
<td>Visible characters and spaces</td>
</tr>
<tr>
<td><code>[:punct:]</code></td>
<td></td>
<td><code>[-!"#$%&amp;'()*+,./:;&lt;=&gt;?@[\\\]_`{|}~]</code></td>
<td>Punctuation characters</td>
</tr>
<tr>
<td><code>[:space:]</code></td>
<td><code>\s</code></td>
<td><code>[ \t\r\n\v\f]</code></td>
<td>Whitespace characters</td>
</tr>
<tr>
<td></td>
<td><code>\S</code></td>
<td><code>[^\s]</code></td>
<td>non-whitespace character</td>
</tr>
<tr>
<td><code>[:upper:]</code></td>
<td></td>
<td><code>[A-Z]</code></td>
<td>Uppercase letters</td>
</tr>
<tr>
<td><code>[:xdigit:]</code></td>
<td></td>
<td><code>[A-Fa-f0-9]</code></td>
<td>Hexadecimal digits</td>
</tr>
</table>
<p>POSIX character classes can only be used within bracket expressions. For example, <code>[[:upper:]ab]</code> matches the uppercase letters and lowercase "<i>a</i>" and "<i>b</i>".</p>
<p>In Perl regular expressions, <code>[:print:]</code> matches <code>[:graph:]</code> union <code>[:space:]</code>. An additional non-POSIX class understood by some tools is <code>[:word:]</code>, which is usually defined as <code>[:alnum:]</code> plus underscore. This reflects the fact that in many programming languages these are the characters that may be used in identifiers. The editor <a href="/wiki/Vim_(text_editor)" title="Vim (text editor)">Vim</a> further distinguishes <i>word</i> and <i>word-head</i> classes (using the notation <code>\w</code> and <code>\h</code>) since in many programming languages the characters that can begin an identifier are not the same as those that can occur in other positions.</p>
<p>Note that what the POSIX regular expression standards call <i>character classes</i> are commonly referred to as <i>POSIX character classes</i> in other regular expression flavors which support them. With most other regular expression flavors, the term <i>character class</i> is used to describe what POSIX calls <i>bracket expressions</i>.</p>
<p><a name="Perl-derivative_regular_expressions" id="Perl-derivative_regular_expressions"></a></p>
<h3><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=9" title="Edit section: Perl-derivative regular expressions">edit</a>]</span> <span class="mw-headline">Perl-derivative regular expressions</span></h3>
<p><a href="/wiki/Perl" title="Perl">Perl</a> has a more consistent and richer syntax than the POSIX basic (BRE) and extended (ERE) regular expression standards. An example of its consistency is that <code>\</code> always escapes a non-alphanumeric character. Another example of functionality possible with Perl but not POSIX-compliant regular expressions is the concept of lazy quantification (see the next section).</p>
<p>Due largely to its expressive power, many other utilities and programming languages have adopted syntax similar to Perl's&#160;— for example, <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a>, <a href="/wiki/JavaScript" title="JavaScript">JavaScript</a>, <a href="/wiki/Perl_Compatible_Regular_Expressions" title="Perl Compatible Regular Expressions">PCRE</a>, <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a>, <a href="/wiki/Ruby_(programming_language)" title="Ruby (programming language)">Ruby</a>, <a href="/wiki/Microsoft" title="Microsoft">Microsoft</a>'s <a href="/wiki/.NET_Framework" title=".NET Framework">.NET Framework</a>, and the <a href="/wiki/World_Wide_Web_Consortium" title="World Wide Web Consortium">W3C's</a> <a href="/wiki/XML_Schema_(W3C)" title="XML Schema (W3C)">XML Schema</a> all use regular expression syntax similar to Perl's. Some languages and tools such as <a href="/wiki/PHP" title="PHP">PHP</a> support multiple regular expression flavors. Perl-derivative regular expression implementations are not identical, and many implement only a subset of Perl's features. With Perl 5.10, this process has come full circle with Perl incorporating syntax extensions originally from Python, PCRE, the .NET Framework, and Java.</p>
<p><a name="Simple_Regular_Expressions" id="Simple_Regular_Expressions"></a></p>
<h3><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=10" title="Edit section: Simple Regular Expressions">edit</a>]</span> <span class="mw-headline">Simple Regular Expressions</span></h3>
<p><b>Simple Regular Expressions</b> is a syntax that may be used by historical versions of application programs, and may be supported within some applications for the purpose of providing backward compatibility, but these forms of regular expression syntax are considered deprecated<sup id="cite_ref-3" class="reference"><a href="#cite_note-3" title=""><span>[</span>4<span>]</span></a></sup> and should not be used.</p>
<p><a name="Lazy_quantification" id="Lazy_quantification"></a></p>
<h3><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=11" title="Edit section: Lazy quantification">edit</a>]</span> <span class="mw-headline">Lazy quantification</span></h3>
<p>The standard quantifiers in regular expressions are greedy, meaning they match as much as they can, only giving back as necessary to match the remainder of the regex. For example, someone new to regexes wishing to find the first instance of an item between &lt; and &gt; symbols in this example:</p>
<pre>
Another whale explosion occurred on &lt;January 26&gt;, &lt;2004&gt;.
</pre>
<p>...would likely come up with the pattern <code>&lt;.*&gt;</code>, or similar. However, this pattern will actually return "<i>&lt;January 26&gt;, &lt;2004&gt;</i>" instead of the "<i>&lt;January 26&gt;</i>" which might be expected, because the <code>*</code> quantifier is greedy&#160;— it will consume as many characters as possible from the input, and "<i>January 26&gt;, &lt;2004</i>" has more characters than "<i>January 26</i>".</p>
<p>Though this problem can be avoided in a number of ways (e.g., by specifying the text that is <i>not</i> to be matched: <code>&lt;[^&gt;]*&gt;</code>), modern regular expression tools allow a quantifier to be specified as <i>lazy</i> (also known as <i>non-greedy</i>, <i>reluctant</i>, <i>minimal</i>, or <i>ungreedy</i>) by putting a question mark after the quantifier (e.g., <code>&lt;.*?&gt;</code>), or by using a modifier which reverses the greediness of quantifiers (though changing the meaning of the standard quantifiers can be confusing). By using a lazy quantifier, the expression tries the minimal match first. Though in the previous example lazy matching is used to select one of many matching results, in some cases it can also be used to improve performance when greedy matching would require more <a href="/wiki/Backtracking" title="Backtracking">backtracking</a>.</p>
<p><a name="Patterns_for_non-regular_languages" id="Patterns_for_non-regular_languages"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=12" title="Edit section: Patterns for non-regular languages">edit</a>]</span> <span class="mw-headline">Patterns for non-regular languages</span></h2>
<p>Many features found in modern regular expression libraries provide an expressive power that far exceeds the <a href="/wiki/Regular_language" title="Regular language">regular languages</a>. For example, the ability to group subexpressions with parentheses and recall the value they match in the same expression means that a pattern can match strings of repeated words like "<i>papa</i>" or "<i>WikiWiki</i>", called <i>squares</i> in formal language theory. The pattern for these strings is <code>(.*)\1</code>. However, the language of squares is not regular, nor is it <a href="/wiki/Context-free_language" title="Context-free language">context-free</a>. <a href="/wiki/Pattern_matching" title="Pattern matching">Pattern matching</a> with an unbounded number of back references, as supported by numerous modern tools, is <a href="/wiki/NP-hard" title="NP-hard">NP-hard</a>.</p>
<p>However, many tools, libraries, and engines that provide such constructions still use the term <i>regular expression</i> for their patterns. This has led to a nomenclature where the term regular expression has different meanings in <a href="/wiki/Formal_language" title="Formal language">formal language theory</a> and pattern matching. For this reason, some people have taken to using the term <i>regex</i> or simply <i>pattern</i> to describe the latter. <a href="/wiki/Larry_Wall" title="Larry Wall">Larry Wall</a> (author of Perl) writes in Apocalypse 5:</p>
<table style="margin:auto; border-collapse:collapse; border-style:none; background-color:transparent;" class="cquote">
<tr>
<td width="20" valign="top" style="color:#B2B7F2;font-size:35px;font-family:'Times New Roman',serif;font-weight:bold;text-align:left;padding:10px 10px;">“</td>
<td valign="top" style="padding:4px 10px;">'Regular expressions' [...] are only marginally related to real regular expressions. Nevertheless, the term has grown with the capabilities of our pattern matching engines, so I'm not going to try to fight linguistic necessity here. I will, however, generally call them "regexes" (or "regexen", when I'm in an Anglo-Saxon mood).<sup id="cite_ref-Apocalypse5_2-1" class="reference"><a href="#cite_note-Apocalypse5-2" title=""><span>[</span>3<span>]</span></a></sup></td>
<td width="20" valign="bottom" style="color:#B2B7F2;font-size:36px;font-family:'Times New Roman',serif;font-weight:bold;text-align:right;padding:10px 10px;">”</td>
</tr>
</table>
<p><a name="Implementations_and_running_times" id="Implementations_and_running_times"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=13" title="Edit section: Implementations and running times">edit</a>]</span> <span class="mw-headline">Implementations and running times</span></h2>
<p>There are at least three essentially different <a href="/wiki/Algorithm" title="Algorithm">algorithms</a> that decide if and how a given regular expression matches a string.</p>
<p>The oldest and fastest two rely on a result in formal language theory that allows every <a href="/wiki/Nondeterministic_finite_state_machine" title="Nondeterministic finite state machine">nondeterministic finite state machine</a> (NFA) to be transformed into a <a href="/wiki/Deterministic_finite_state_machine" title="Deterministic finite state machine" class="mw-redirect">deterministic finite state machine</a> (DFA). The DFA can be constructed explicitly and then run on the resulting input string one symbol at a time. Constructing the DFA for a regular expression of size <i>m</i> has the time and memory cost of <a href="/wiki/Big_O_notation" title="Big O notation"><i>O</i></a>(<i>2<sup>m</sup></i>), but it can be run on a string of size <i>n</i> in time <i>O</i>(<i>n</i>). An alternative approach is to simulate the NFA directly, essentially building each DFA state on demand and then discarding it at the next step, possibly with caching. This keeps the DFA implicit and avoids the exponential construction cost, but running cost rises to <i>O</i>(<i>nm</i>). The explicit approach is called the DFA algorithm and the implicit approach the NFA algorithm. As both can be seen as different ways of executing the same DFA, they are also often called the DFA algorithm without making a distinction. These algorithms are fast, but using them for recalling grouped subexpressions, lazy quantification, and similar features is tricky.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4" title=""><span>[</span>5<span>]</span></a></sup><sup id="cite_ref-5" class="reference"><a href="#cite_note-5" title=""><span>[</span>6<span>]</span></a></sup></p>
<p>The third algorithm is to match the pattern against the input string by <a href="/wiki/Backtracking" title="Backtracking">backtracking</a>. This algorithm is commonly called NFA, but this terminology can be confusing. Its running time can be exponential, which simple implementations exhibit when matching against expressions like <code>(a|aa)*b</code> that contain both alternation and unbounded quantification and force the algorithm to consider an exponentially increasing number of sub-cases. More complex implementations will often identify and speed up or abort common cases where they would otherwise run slowly.</p>
<p>Although backtracking implementations only give an exponential guarantee in the worst case, they provide much greater flexibility and expressive power. For example, any implementation which allows the use of backreferences, or implements the various extensions introduced by Perl, must use a backtracking implementation.</p>
<p>Some implementations try to provide the best of both algorithms by first running a fast DFA match to see if the string matches the regular expression at all, and only in that case perform a potentially slower backtracking match.</p>
<p><a name="Regular_expressions_and_Unicode" id="Regular_expressions_and_Unicode"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=14" title="Edit section: Regular expressions and Unicode">edit</a>]</span> <span class="mw-headline">Regular expressions and Unicode</span></h2>
<p>Regular expressions were originally used with <a href="/wiki/American_Standard_Code_for_Information_Interchange" title="American Standard Code for Information Interchange" class="mw-redirect">ASCII</a> characters. Many regular expression engines can now handle <a href="/wiki/Unicode" title="Unicode">Unicode</a>. In most respects it makes no difference what the character set is, but some issues do arise when extending regular expressions to support Unicode.</p>
<ul>
<li>Supported encoding. Some regular expression libraries expect the <a href="/wiki/UTF-8" title="UTF-8">UTF-8</a> encoding, while others might expect <a href="/wiki/UTF-16" title="UTF-16" class="mw-redirect">UTF-16</a>, or <a href="/wiki/UTF-32" title="UTF-32" class="mw-redirect">UTF-32</a>.</li>
</ul>
<ul>
<li>Supported Unicode range. Many regular expression engines support only the <a href="/wiki/Mapping_of_Unicode_characters#Basic_Multilingual_Plane" title="Mapping of Unicode characters">Basic Multilingual Plane</a>, that is, the characters which can be encoded with only 16 bits. Currently, only a few regular expression engines can handle the full 21-bit Unicode range.</li>
</ul>
<ul>
<li>Extending ASCII-oriented constructs to Unicode. For example, in ASCII-based implementations, character ranges of the form <code>[x-y]</code> are valid wherever <i>x</i> and <i>y</i> are codepoints in the range [0x00,0x7F] and codepoint(x) ≤ codepoint(y). The natural extension of such character ranges to Unicode would simply change the requirement that the endpoints lie in [0x00,0x7F] to the requirement that they lie in [0,0x10FFFF]. However, in practice this is often not the case. Some implementations, such as that of <a href="/wiki/Gawk" title="Gawk" class="mw-redirect">gawk</a>, do not allow character ranges to cross Unicode blocks. A range like [0x61,0x7F] is valid since both endpoints fall within the Basic Latin block, as is [0x0530,0x0560] since both endpoints fall within the Armenian block, but a range like [0x0061,0x0532] is invalid since it includes multiple Unicode blocks. Other engines, such as that of the <a href="/wiki/Vim_(text_editor)" title="Vim (text editor)">Vim</a> editor, allow block-crossing but limit the number of characters in a range to 128.</li>
</ul>
<ul>
<li>Case insensitivity. Some case-insensitivity flags affect only the ASCII characters. Other flags affect all characters. Some engines have two different flags, one for ASCII, the other for Unicode. Exactly which characters belong to the POSIX classes also varies.</li>
</ul>
<ul>
<li>Cousins of case insensitivity. As the English alphabet has case distinction, case insensitivity became a logical feature in text searching. Unicode introduced alphabetic scripts without case like <a href="/wiki/Devan%C4%81gar%C4%AB" title="Devanāgarī" class="mw-redirect">Devanagari</a>. For these, case sensitivity is not applicable. For scripts like Chinese, another distinction seems logical: between traditional and simplified. In Arabic scripts, insensitivity to initial, medial, final and isolated position may be desired.</li>
</ul>
<ul>
<li>Normalization. Unicode introduced combining characters. Like old typewriters, plain letters can be followed by non-spacing accent symbols to form a single accented letter. As a consequence, two different code sequences can result in identical character display.</li>
</ul>
<ul>
<li>New control codes. Unicode introduced amongst others, byte order marks and text direction markers. These codes might have to be dealt with in a special way.</li>
</ul>
<ul>
<li>Introduction of character classes for Unicode blocks and Unicode general character properties. In <a href="/wiki/Perl" title="Perl">Perl</a> and the <code><a href="http://java.sun.com/javase/6/docs/api/java/util/regex/package-summary.html" class="external text" title="http://java.sun.com/javase/6/docs/api/java/util/regex/package-summary.html" rel="nofollow">java.util.regex</a></code> library, classes of the form <code>\p{InX}</code> match characters in block <i>X</i> and <code>\P{InX}</code> match the opposite. Similarly, <code>\p{Armenian}</code> matches any character in the Armenian block, and <code>\p{X}</code> matches any character with the general character property <i>X</i>. For example, <code>\p{Lu}</code> matches any upper-case letter.</li>
</ul>
<p><a name="Uses_of_regular_expressions" id="Uses_of_regular_expressions"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=15" title="Edit section: Uses of regular expressions">edit</a>]</span> <span class="mw-headline">Uses of regular expressions</span></h2>
<p>Regular expressions are useful in the production of <a href="/wiki/Syntax_highlighting" title="Syntax highlighting">syntax highlighting</a> systems, <a href="/wiki/Data_validation" title="Data validation">data validation</a>, and many other tasks.</p>
<p>While regular expressions would be useful on <a href="/wiki/Search_engine_(computing)" title="Search engine (computing)">search engines</a> such as <a href="/wiki/Google" title="Google">Google</a> or <a href="/wiki/Live_Search" title="Live Search">Live Search</a>, processing them across the entire database could consume excessive computer resources depending on the complexity and design of the regex. Although in many cases system administrators can run regex-based queries internally, most search engines do not offer regex support to the public. A notable exception is <a href="/wiki/Google_Code_Search" title="Google Code Search">Google Code Search</a>.</p>
<p><a name="See_also" id="See_also"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=16" title="Edit section: See also">edit</a>]</span> <span class="mw-headline">See also</span></h2>
<ul>
<li><a href="/wiki/Comparison_of_regular_expression_engines" title="Comparison of regular expression engines">Comparison of regular expression engines</a></li>
<li><a href="/wiki/Extended_Backus%E2%80%93Naur_form" title="Extended Backus–Naur form" class="mw-redirect">Extended Backus–Naur form</a></li>
<li><a href="/wiki/List_of_regular_expression_software" title="List of regular expression software">List of regular expression software</a></li>
<li><a href="/wiki/Regular_expression_examples" title="Regular expression examples">Regular expression examples</a></li>
<li><a href="/wiki/Regular_tree_grammar" title="Regular tree grammar">Regular tree grammar</a></li>
<li><a href="/wiki/Regular_language" title="Regular language">Regular language</a></li>
</ul>
<p><a name="Notes" id="Notes"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=17" title="Edit section: Notes">edit</a>]</span> <span class="mw-headline">Notes</span></h2>
<div class="references-small">
<ol class="references">
<li id="cite_note-0"><b><a href="#cite_ref-0" title="">^</a></b> <cite style="font-style:normal" class="web" id="CITEREF.5B.5BEric_S._Raymond.7CRaymond.2C_Eric_S..5D.5D_citing_.5B.5BDennis_Ritchie.5D.5D2003"><a href="/wiki/Eric_S._Raymond" title="Eric S. Raymond">Raymond, Eric S.</a> citing <a href="/wiki/Dennis_Ritchie" title="Dennis Ritchie">Dennis Ritchie</a> (2003). <a href="http://catb.org/jargon/html/G/grep.html" class="external text" title="http://catb.org/jargon/html/G/grep.html" rel="nofollow">"Jargon File 4.4.7: grep"</a><span class="printonly">. <a href="http://catb.org/jargon/html/G/grep.html" class="external free" title="http://catb.org/jargon/html/G/grep.html" rel="nofollow">http://catb.org/jargon/html/G/grep.html</a></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Jargon+File+4.4.7%3A+grep&amp;rft.atitle=&amp;rft.aulast=%5B%5BEric+S.+Raymond%7CRaymond%2C+Eric+S.%5D%5D+citing+%5B%5BDennis+Ritchie%5D%5D&amp;rft.au=%5B%5BEric+S.+Raymond%7CRaymond%2C+Eric+S.%5D%5D+citing+%5B%5BDennis+Ritchie%5D%5D&amp;rft.date=2003&amp;rft_id=http%3A%2F%2Fcatb.org%2Fjargon%2Fhtml%2FG%2Fgrep.html&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li id="cite_note-1"><b><a href="#cite_ref-1" title="">^</a></b> <cite style="font-style:normal" class="web" id="CITEREF.5B.5BLarry_Wall.7CWall.2C_Larry.5D.5D_and_the_Perl_5_development_team2006"><a href="/wiki/Larry_Wall" title="Larry Wall">Wall, Larry</a> and the Perl 5 development team (2006). <a href="http://perldoc.perl.org/perlre.html" class="external text" title="http://perldoc.perl.org/perlre.html" rel="nofollow">"perlre: Perl regular expressions"</a><span class="printonly">. <a href="http://perldoc.perl.org/perlre.html" class="external free" title="http://perldoc.perl.org/perlre.html" rel="nofollow">http://perldoc.perl.org/perlre.html</a></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=perlre%3A+Perl+regular+expressions&amp;rft.atitle=&amp;rft.aulast=%5B%5BLarry+Wall%7CWall%2C+Larry%5D%5D+and+the+Perl+5+development+team&amp;rft.au=%5B%5BLarry+Wall%7CWall%2C+Larry%5D%5D+and+the+Perl+5+development+team&amp;rft.date=2006&amp;rft_id=http%3A%2F%2Fperldoc.perl.org%2Fperlre.html&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li id="cite_note-Apocalypse5-2">^ <a href="#cite_ref-Apocalypse5_2-0" title=""><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Apocalypse5_2-1" title=""><sup><i><b>b</b></i></sup></a> <cite style="font-style:normal" class="web" id="CITEREF.5B.5BLarry_Wall.7CWall.2C_Larry.5D.5D2002"><a href="/wiki/Larry_Wall" title="Larry Wall">Wall, Larry</a> (2002-06-04). <a href="http://dev.perl.org/perl6/doc/design/apo/A05.html" class="external text" title="http://dev.perl.org/perl6/doc/design/apo/A05.html" rel="nofollow">"Apocalypse 5: Pattern Matching"</a><span class="printonly">. <a href="http://dev.perl.org/perl6/doc/design/apo/A05.html" class="external free" title="http://dev.perl.org/perl6/doc/design/apo/A05.html" rel="nofollow">http://dev.perl.org/perl6/doc/design/apo/A05.html</a></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Apocalypse+5%3A+Pattern+Matching&amp;rft.atitle=&amp;rft.aulast=%5B%5BLarry+Wall%7CWall%2C+Larry%5D%5D&amp;rft.au=%5B%5BLarry+Wall%7CWall%2C+Larry%5D%5D&amp;rft.date=2002-06-04&amp;rft_id=http%3A%2F%2Fdev.perl.org%2Fperl6%2Fdoc%2Fdesign%2Fapo%2FA05.html&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li id="cite_note-3"><b><a href="#cite_ref-3" title="">^</a></b> The Single Unix Specification (Version 2)</li>
<li id="cite_note-4"><b><a href="#cite_ref-4" title="">^</a></b> <cite style="font-style:normal" class="web" id="CITEREFCox.2C_Russ2007">Cox, Russ (2007). <a href="http://swtch.com/~rsc/regexp/regexp1.html" class="external text" title="http://swtch.com/~rsc/regexp/regexp1.html" rel="nofollow">"Regular Expression Matching Can Be Simple and Fast"</a><span class="printonly">. <a href="http://swtch.com/~rsc/regexp/regexp1.html" class="external free" title="http://swtch.com/~rsc/regexp/regexp1.html" rel="nofollow">http://swtch.com/~rsc/regexp/regexp1.html</a></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Regular+Expression+Matching+Can+Be+Simple+and+Fast&amp;rft.atitle=&amp;rft.aulast=Cox%2C+Russ&amp;rft.au=Cox%2C+Russ&amp;rft.date=2007&amp;rft_id=http%3A%2F%2Fswtch.com%2F%7Ersc%2Fregexp%2Fregexp1.html&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li id="cite_note-5"><b><a href="#cite_ref-5" title="">^</a></b> <cite style="font-style:normal" class="web" id="CITEREFVille_Laurikari">Ville Laurikari. <a href="http://www.laurikari.net/tre/" class="external text" title="http://www.laurikari.net/tre/" rel="nofollow">"LibTRE"</a><span class="printonly">. <a href="http://www.laurikari.net/tre/" class="external free" title="http://www.laurikari.net/tre/" rel="nofollow">http://www.laurikari.net/tre/</a></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=LibTRE&amp;rft.atitle=&amp;rft.aulast=Ville+Laurikari&amp;rft.au=Ville+Laurikari&amp;rft_id=http%3A%2F%2Fwww.laurikari.net%2Ftre%2F&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
</ol>
</div>
<p><a name="References" id="References"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=18" title="Edit section: References">edit</a>]</span> <span class="mw-headline">References</span></h2>
<div class="references-small" style="margin-left:1.5em;">
<ul>
<li><cite style="font-style:normal" class=""><a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html" class="external text" title="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html" rel="nofollow">"Regular Expressions"</a>, <i>The Single UNIX Specification, Version 2</i>, The Open Group, 1997<span class="printonly">, <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html" class="external free" title="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html" rel="nofollow">http://www.opengroup.org/onlinepubs/007908799/xbd/re.html</a></span></cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Regular+Expressions&amp;rft.atitle=The+Single+UNIX++Specification%2C+Version+2&amp;rft.date=1997&amp;rft.pub=The+Open+Group&amp;rft_id=http%3A%2F%2Fwww.opengroup.org%2Fonlinepubs%2F007908799%2Fxbd%2Fre.html&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFForta"><a href="/wiki/Ben_Forta" title="Ben Forta">Forta, Ben</a>. <i>Sams Teach Yourself Regular Expressions in 10 Minutes</i>. Sams. <a href="/wiki/Special:BookSources/0672325667" class="internal">ISBN 0-672-32566-7</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Sams+Teach+Yourself+Regular+Expressions+in+10+Minutes&amp;rft.aulast=Forta&amp;rft.aufirst=Ben&amp;rft.au=Forta%2C+Ben&amp;rft.pub=Sams&amp;rft.isbn=0-672-32566-7&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFFriedl"><a href="/wiki/Jeffrey_Friedl" title="Jeffrey Friedl">Friedl, Jeffrey</a>. <i><a href="http://regex.info/" class="external text" title="http://regex.info/" rel="nofollow">Mastering Regular Expressions</a></i>. <a href="/wiki/O%27Reilly_Media" title="O'Reilly Media">O'Reilly</a>. <a href="/wiki/Special:BookSources/0596002890" class="internal">ISBN 0-596-00289-0</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=%5Bhttp%3A%2F%2Fregex.info%2F+Mastering+Regular+Expressions%5D&amp;rft.aulast=Friedl&amp;rft.aufirst=Jeffrey&amp;rft.au=Friedl%2C+Jeffrey&amp;rft.pub=%5B%5BO%27Reilly+Media%7CO%27Reilly%5D%5D&amp;rft.isbn=0-596-00289-0&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFHabibi"><a href="/w/index.php?title=Mehran_Habibi&amp;action=edit&amp;redlink=1" class="new" title="Mehran Habibi (page does not exist)">Habibi, Mehran</a>. <i>Real World Regular Expressions with Java 1.4</i>. Springer. <a href="/wiki/Special:BookSources/1590591070" class="internal">ISBN 1-59059-107-0</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Real+World+Regular+Expressions+with+Java+1.4&amp;rft.aulast=Habibi&amp;rft.aufirst=Mehran&amp;rft.au=Habibi%2C+Mehran&amp;rft.pub=Springer&amp;rft.isbn=1-59059-107-0&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFLiger.5B.5BCraig_McQueen.5D.5D.2C_.5B.5BPaul_Wilton.5D.5D"><a href="/w/index.php?title=Francois_Liger&amp;action=edit&amp;redlink=1" class="new" title="Francois Liger (page does not exist)">Liger, Francois</a>; <a href="/w/index.php?title=Craig_McQueen&amp;action=edit&amp;redlink=1" class="new" title="Craig McQueen (page does not exist)">Craig McQueen</a>, <a href="/w/index.php?title=Paul_Wilton&amp;action=edit&amp;redlink=1" class="new" title="Paul Wilton (page does not exist)">Paul Wilton</a>. <i>Visual Basic .NET Text Manipulation Handbook</i>. <a href="/wiki/Wrox_Press" title="Wrox Press">Wrox Press</a>. <a href="/wiki/Special:BookSources/1861007302" class="internal">ISBN 1-86100-730-2</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Visual+Basic+.NET+Text+Manipulation+Handbook&amp;rft.aulast=Liger&amp;rft.aufirst=Francois&amp;rft.au=Liger%2C+Francois&amp;rft.au=%5B%5BCraig+McQueen%5D%5D%2C+%5B%5BPaul+Wilton%5D%5D&amp;rft.pub=%5B%5BWrox+Press%5D%5D&amp;rft.isbn=1-86100-730-2&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFSipser"><a href="/wiki/Michael_Sipser" title="Michael Sipser">Sipser, Michael</a>. "Chapter 1: Regular Languages". <i>Introduction to the Theory of Computation</i>. PWS Publishing. pp.&#160;31–90. <a href="/wiki/Special:BookSources/053494728X" class="internal">ISBN 0-534-94728-X</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Chapter+1%3A+Regular+Languages&amp;rft.atitle=Introduction+to+the+Theory+of+Computation&amp;rft.aulast=Sipser&amp;rft.aufirst=Michael&amp;rft.au=Sipser%2C+Michael&amp;rft.pages=pp.%26nbsp%3B31%E2%80%9390&amp;rft.pub=PWS+Publishing&amp;rft.isbn=0-534-94728-X&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
<li><cite style="font-style:normal" class="book" id="CITEREFStubblebine"><a href="/w/index.php?title=Tony_Stubblebine&amp;action=edit&amp;redlink=1" class="new" title="Tony Stubblebine (page does not exist)">Stubblebine, Tony</a>. <i>Regular Expression Pocket Reference</i>. O'Reilly. <a href="/wiki/Special:BookSources/059600415X" class="internal">ISBN 0-596-00415-X</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Regular+Expression+Pocket+Reference&amp;rft.aulast=Stubblebine&amp;rft.aufirst=Tony&amp;rft.au=Stubblebine%2C+Tony&amp;rft.pub=O%27Reilly&amp;rft.isbn=0-596-00415-X&amp;rfr_id=info:sid/en.wikipedia.org:Regular_expression"><span style="display: none;">&#160;</span></span></li>
</ul>
</div>
<p><a name="External_links" id="External_links"></a></p>
<h2><span class="editsection">[<a href="/w/index.php?title=Regular_expression&amp;action=edit&amp;section=19" title="Edit section: External links">edit</a>]</span> <span class="mw-headline">External links</span></h2>
<table class="metadata plainlinks mbox-small" style="border:1px solid #aaa; background-color:#f9f9f9;">
<tr>
<td class="mbox-image"><a href="http://en.wikibooks.org/wiki/Special:Search/Regular_expression" title="b:Special:Search/Regular expression"><img alt="Sister project" src="http://upload.wikimedia.org/wikipedia/commons/thumb/d/df/Wikibooks-logo-en-noslogan.svg/40px-Wikibooks-logo-en-noslogan.svg.png" width="40" height="40" border="0" /></a></td>
<td class="mbox-text" style=""><a href="/wiki/Wikibooks" title="Wikibooks">Wikibooks</a> has a book on the topic of
<div style="margin-left:10px;"><i><b><a href="http://en.wikibooks.org/wiki/Regular_Expressions" class="extiw" title="wikibooks:Regular Expressions">Regular Expressions</a></b></i></div>
</td>
</tr>
</table>
<ul>
<li><a href="http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Reference:Global_Objects:RegExp" class="external text" title="http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Reference:Global_Objects:RegExp" rel="nofollow">JavaScript RegExp Object Reference</a> at the <a href="/wiki/Mozilla_Developer_Center" title="Mozilla Developer Center">Mozilla Developer Center</a></li>
<li><a href="http://java.sun.com/docs/books/tutorial/essential/regex/index.html" class="external text" title="http://java.sun.com/docs/books/tutorial/essential/regex/index.html" rel="nofollow">Java Tutorials: Regular Expressions</a></li>
<li><a href="http://perldoc.perl.org/perlre.html" class="external text" title="http://perldoc.perl.org/perlre.html" rel="nofollow">Perl Regular Expressions documentation</a></li>
<li><a href="http://msdn2.microsoft.com/en-us/library/ms974570.aspx" class="external text" title="http://msdn2.microsoft.com/en-us/library/ms974570.aspx" rel="nofollow">VBScript and Regular Expressions</a></li>
<li><a href="http://msdn.microsoft.com/en-us/library/hs600312.aspx" class="external text" title="http://msdn.microsoft.com/en-us/library/hs600312.aspx" rel="nofollow">.NET Framework Regular Expressions</a></li>
<li><a href="http://www.dmoz.org/Computers/Programming/Languages/Regular_Expressions/" class="external text" title="http://www.dmoz.org/Computers/Programming/Languages/Regular_Expressions/" rel="nofollow">Regular Expressions</a> at the <a href="/wiki/Open_Directory_Project" title="Open Directory Project">Open Directory Project</a></li>
<li><a href="http://www.regular-expressions.info/" class="external text" title="http://www.regular-expressions.info/" rel="nofollow">Regular-Expressions.info</a>&#160;— tutorial and reference which covers many popular regex flavors</li>
<li><a href="http://billposer.org/Linguistics/Computation/Resources.html#patterns" class="external text" title="http://billposer.org/Linguistics/Computation/Resources.html#patterns" rel="nofollow">Pattern matching tools and libraries</a></li>
<li><a href="http://www.seeingwithc.org/topic7html.html" class="external text" title="http://www.seeingwithc.org/topic7html.html" rel="nofollow">Regular Expressions writeup explaining math. and computer notations.</a></li>
<li><a href="http://regexlib.com/" class="external text" title="http://regexlib.com/" rel="nofollow">Regular Expressions Library</a></li>
</ul>


<!-- 
NewPP limit report
Preprocessor node count: 5737/1000000
Post-expand include size: 36066/2048000 bytes
Template argument size: 11378/2048000 bytes
Expensive parser function count: 0/500
-->

<!-- Saved in parser cache with key enwiki:pcache:idhash:25717-0!1!0!default!!en!2 and timestamp 20090404014020 -->
<div class="printfooter">
Retrieved from "<a href="http://en.wikipedia.org/wiki/Regular_expression">http://en.wikipedia.org/wiki/Regular_expression</a>"</div>
			<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Categories</a>:&#32;<span dir='ltr'><a href="/wiki/Category:Formal_languages" title="Category:Formal languages">Formal languages</a></span> | <span dir='ltr'><a href="/wiki/Category:Pattern_matching" title="Category:Pattern matching">Pattern matching</a></span> | <span dir='ltr'><a href="/wiki/Category:Programming_language_topics" title="Category:Programming language topics">Programming language topics</a></span></div></div>			<!-- end content -->
						<div class="visualClear"></div>
		</div>
	</div>
		</div>
		<div id="column-one">
	<div id="p-cactions" class="portlet">
		<h5>Views</h5>
		<div class="pBody">
			<ul>
	
				 <li id="ca-nstab-main" class="selected"><a href="/wiki/Regular_expression" title="View the content page [c]" accesskey="c">Article</a></li>
				 <li id="ca-talk"><a href="/wiki/Talk:Regular_expression" title="Discussion about the content page [t]" accesskey="t">Discussion</a></li>
				 <li id="ca-edit"><a href="/w/index.php?title=Regular_expression&amp;action=edit" title="You can edit this page. &#10;Please use the preview button before saving. [e]" accesskey="e">Edit this page</a></li>
				 <li id="ca-history"><a href="/w/index.php?title=Regular_expression&amp;action=history" title="Past versions of this page [h]" accesskey="h">History</a></li>			</ul>
		</div>
	</div>
	<div class="portlet" id="p-personal">
		<h5>Personal tools</h5>
		<div class="pBody">
			<ul>
				<li id="pt-login"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Regular_expression" title="You are encouraged to log in; however, it is not mandatory. [o]" accesskey="o">Log in / create account</a></li>
			</ul>
		</div>
	</div>
	<div class="portlet" id="p-logo">
		<a style="background-image: url(http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png);" href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"></a>
	</div>
	<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
	<div class='generated-sidebar portlet' id='p-navigation'>
		<h5>Navigation</h5>
		<div class='pBody'>
			<ul>
				<li id="n-mainpage-description"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z">Main page</a></li>
				<li id="n-contents"><a href="/wiki/Portal:Contents" title="Guides to browsing Wikipedia">Contents</a></li>
				<li id="n-featuredcontent"><a href="/wiki/Portal:Featured_content" title="Featured content — the best of Wikipedia">Featured content</a></li>
				<li id="n-currentevents"><a href="/wiki/Portal:Current_events" title="Find background information on current events">Current events</a></li>
				<li id="n-randompage"><a href="/wiki/Special:Random" title="Load a random article [x]" accesskey="x">Random article</a></li>
			</ul>
		</div>
	</div>
	<div id="p-search" class="portlet">
		<h5><label for="searchInput">Search</label></h5>
		<div id="searchBody" class="pBody">
			<form action="/w/index.php" id="searchform"><div>
				<input type='hidden' name="title" value="Special:Search"/>
				<input id="searchInput" name="search" type="text" title="Search Wikipedia [f]" accesskey="f" value="" />
				<input type='submit' name="go" class="searchButton" id="searchGoButton"	value="Go" title="Go to a page with this exact name if one exists" />&nbsp;
				<input type='submit' name="fulltext" class="searchButton" id="mw-searchButton" value="Search" title="Search Wikipedia for this text" />
			</div></form>
		</div>
	</div>
	<div class='generated-sidebar portlet' id='p-interaction'>
		<h5>Interaction</h5>
		<div class='pBody'>
			<ul>
				<li id="n-aboutsite"><a href="/wiki/Wikipedia:About" title="Find out about Wikipedia">About Wikipedia</a></li>
				<li id="n-portal"><a href="/wiki/Wikipedia:Community_portal" title="About the project, what you can do, where to find things">Community portal</a></li>
				<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="The list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li>
				<li id="n-contact"><a href="/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia">Contact Wikipedia</a></li>
				<li id="n-sitesupport"><a href="http://wikimediafoundation.org/wiki/Donate" title="Support us">Donate to Wikipedia</a></li>
				<li id="n-help"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia">Help</a></li>
			</ul>
		</div>
	</div>
	<div class="portlet" id="p-tb">
		<h5>Toolbox</h5>
		<div class="pBody">
			<ul>
				<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/Regular_expression" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j">What links here</a></li>
				<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/Regular_expression" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
<li id="t-upload"><a href="/wiki/Wikipedia:Upload" title="Upload files [u]" accesskey="u">Upload file</a></li>
<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="List of all special pages [q]" accesskey="q">Special pages</a></li>
				<li id="t-print"><a href="/w/index.php?title=Regular_expression&amp;printable=yes" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li>				<li id="t-permalink"><a href="/w/index.php?title=Regular_expression&amp;oldid=281174402" title="Permanent link to this version of the page">Permanent link</a></li><li id="t-cite"><a href="/w/index.php?title=Special:Cite&amp;page=Regular_expression&amp;id=281174402">Cite this page</a></li>			</ul>
		</div>
	</div>
	<div id="p-lang" class="portlet">
		<h5>Languages</h5>
		<div class="pBody">
			<ul>
				<li class="interwiki-ar"><a href="http://ar.wikipedia.org/wiki/%D8%AA%D8%B9%D8%A8%D9%8A%D8%B1_%D9%82%D9%8A%D8%A7%D8%B3%D9%8A">العربية</a></li>
				<li class="interwiki-bg"><a href="http://bg.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D1%83%D0%BB%D1%8F%D1%80%D0%B5%D0%BD_%D0%B8%D0%B7%D1%80%D0%B0%D0%B7">Български</a></li>
				<li class="interwiki-ca"><a href="http://ca.wikipedia.org/wiki/Expressi%C3%B3_regular">Català</a></li>
				<li class="interwiki-cs"><a href="http://cs.wikipedia.org/wiki/Regul%C3%A1rn%C3%AD_v%C3%BDraz">Česky</a></li>
				<li class="interwiki-da"><a href="http://da.wikipedia.org/wiki/Regul%C3%A6re_udtryk">Dansk</a></li>
				<li class="interwiki-de"><a href="http://de.wikipedia.org/wiki/Regul%C3%A4rer_Ausdruck">Deutsch</a></li>
				<li class="interwiki-el"><a href="http://el.wikipedia.org/wiki/%CE%9A%CE%B1%CE%BD%CE%BF%CE%BD%CE%B9%CE%BA%CE%AE_%CE%AD%CE%BA%CF%86%CF%81%CE%B1%CF%83%CE%B7">Ελληνικά</a></li>
				<li class="interwiki-es"><a href="http://es.wikipedia.org/wiki/Expresi%C3%B3n_regular">Español</a></li>
				<li class="interwiki-eo"><a href="http://eo.wikipedia.org/wiki/Regula_esprimo">Esperanto</a></li>
				<li class="interwiki-eu"><a href="http://eu.wikipedia.org/wiki/Adierazpen_erregular">Euskara</a></li>
				<li class="interwiki-fr"><a href="http://fr.wikipedia.org/wiki/Expression_rationnelle">Français</a></li>
				<li class="interwiki-gl"><a href="http://gl.wikipedia.org/wiki/Expresi%C3%B3n_regular">Galego</a></li>
				<li class="interwiki-ko"><a href="http://ko.wikipedia.org/wiki/%EC%A0%95%EA%B7%9C_%ED%91%9C%ED%98%84%EC%8B%9D">한국어</a></li>
				<li class="interwiki-hr"><a href="http://hr.wikipedia.org/wiki/Regularni_izraz">Hrvatski</a></li>
				<li class="interwiki-is"><a href="http://is.wikipedia.org/wiki/Regluleg_seg%C3%B0">Íslenska</a></li>
				<li class="interwiki-it"><a href="http://it.wikipedia.org/wiki/Espressione_regolare">Italiano</a></li>
				<li class="interwiki-he"><a href="http://he.wikipedia.org/wiki/%D7%91%D7%99%D7%98%D7%95%D7%99_%D7%A8%D7%92%D7%95%D7%9C%D7%A8%D7%99">עברית</a></li>
				<li class="interwiki-hu"><a href="http://hu.wikipedia.org/wiki/Szab%C3%A1lyos_kifejez%C3%A9s">Magyar</a></li>
				<li class="interwiki-mk"><a href="http://mk.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D1%83%D0%BB%D0%B0%D1%80%D0%BD%D0%B8_%D0%B8%D0%B7%D1%80%D0%B0%D0%B7%D0%B8">Македонски</a></li>
				<li class="interwiki-nl"><a href="http://nl.wikipedia.org/wiki/Reguliere_expressie">Nederlands</a></li>
				<li class="interwiki-ja"><a href="http://ja.wikipedia.org/wiki/%E6%AD%A3%E8%A6%8F%E8%A1%A8%E7%8F%BE">日本語</a></li>
				<li class="interwiki-no"><a href="http://no.wikipedia.org/wiki/Regul%C3%A6rt_uttrykk">‪Norsk (bokmål)‬</a></li>
				<li class="interwiki-pl"><a href="http://pl.wikipedia.org/wiki/Wyra%C5%BCenie_regularne">Polski</a></li>
				<li class="interwiki-pt"><a href="http://pt.wikipedia.org/wiki/Express%C3%A3o_regular">Português</a></li>
				<li class="interwiki-ru"><a href="http://ru.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D1%83%D0%BB%D1%8F%D1%80%D0%BD%D1%8B%D0%B5_%D0%B2%D1%8B%D1%80%D0%B0%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F">Русский</a></li>
				<li class="interwiki-sk"><a href="http://sk.wikipedia.org/wiki/Regul%C3%A1rny_v%C3%BDraz">Slovenčina</a></li>
				<li class="interwiki-sr"><a href="http://sr.wikipedia.org/wiki/Regularni_izraz">Српски / Srpski</a></li>
				<li class="interwiki-fi"><a href="http://fi.wikipedia.org/wiki/S%C3%A4%C3%A4nn%C3%B6llinen_lauseke">Suomi</a></li>
				<li class="interwiki-sv"><a href="http://sv.wikipedia.org/wiki/Regulj%C3%A4ra_uttryck">Svenska</a></li>
				<li class="interwiki-ta"><a href="http://ta.wikipedia.org/wiki/%E0%AE%9A%E0%AF%81%E0%AE%B0%E0%AF%81%E0%AE%99%E0%AF%8D%E0%AE%95%E0%AF%81%E0%AE%B1%E0%AE%BF%E0%AE%A4%E0%AF%8D%E0%AE%A4%E0%AF%8A%E0%AE%9F%E0%AE%B0%E0%AF%8D">தமிழ்</a></li>
				<li class="interwiki-th"><a href="http://th.wikipedia.org/wiki/%E0%B8%99%E0%B8%B4%E0%B8%9E%E0%B8%88%E0%B8%99%E0%B9%8C%E0%B8%9B%E0%B8%A3%E0%B8%81%E0%B8%95%E0%B8%B4">ไทย</a></li>
				<li class="interwiki-vi"><a href="http://vi.wikipedia.org/wiki/Bi%E1%BB%83u_th%E1%BB%A9c_ch%C3%ADnh_quy">Tiếng Việt</a></li>
				<li class="interwiki-tr"><a href="http://tr.wikipedia.org/wiki/D%C3%BCzenlemeli_ifadeler_(Programlama)">Türkçe</a></li>
				<li class="interwiki-uk"><a href="http://uk.wikipedia.org/wiki/%D0%A0%D0%B5%D0%B3%D1%83%D0%BB%D1%8F%D1%80%D0%BD%D0%B8%D0%B9_%D0%B2%D0%B8%D1%80%D0%B0%D0%B7">Українська</a></li>
				<li class="interwiki-zh"><a href="http://zh.wikipedia.org/wiki/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F">中文</a></li>
			</ul>
		</div>
	</div>
		</div><!-- end of the left (by default at least) column -->
			<div class="visualClear"></div>
			<div id="footer">
				<div id="f-poweredbyico"><a href="http://www.mediawiki.org/"><img src="/skins-1.5/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" /></a></div>
				<div id="f-copyrightico"><a href="http://wikimediafoundation.org/"><img src="/images/wikimedia-button.png" border="0" alt="Wikimedia Foundation"/></a></div>
			<ul id="f-list">
					<li id="lastmod"> This page was last modified on 1 April 2009, at 22:54.</li>
					<li id="copyright">All text is available under the terms of the <a class='internal' href="http://en.wikipedia.org/wiki/Wikipedia:Text_of_the_GNU_Free_Documentation_License" title="Wikipedia:Text of the GNU Free Documentation License">GNU Free Documentation License</a>. (See <b><a class='internal' href="http://en.wikipedia.org/wiki/Wikipedia:Copyrights" title="Wikipedia:Copyrights">Copyrights</a></b> for details.) <br /> Wikipedia&reg; is a registered trademark of the <a href="http://www.wikimediafoundation.org">Wikimedia Foundation, Inc.</a>, a U.S. registered <a class='internal' href="http://en.wikipedia.org/wiki/501%28c%29#501.28c.29.283.29" title="501(c)(3)">501(c)(3)</a> <a href="http://wikimediafoundation.org/wiki/Deductibility_of_donations">tax-deductible</a> <a class='internal' href="http://en.wikipedia.org/wiki/Non-profit_organization" title="Non-profit organization">nonprofit</a> <a href="http://en.wikipedia.org/wiki/Charitable_organization" title="Charitable organization">charity</a>.<br /></li>
					<li id="privacy"><a href="http://wikimediafoundation.org/wiki/Privacy_policy" title="wikimedia:Privacy policy">Privacy policy</a></li>
					<li id="about"><a href="/wiki/Wikipedia:About" title="Wikipedia:About">About Wikipedia</a></li>
					<li id="disclaimer"><a href="/wiki/Wikipedia:General_disclaimer" title="Wikipedia:General disclaimer">Disclaimers</a></li>
			</ul>
		</div>
</div>

		<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
<!-- Served by srv209 in 0.178 secs. --></body></html>
